# 猫狗大战简化版
import tensorflow as tf
import random
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import misc
import cv2


tf.set_random_seed(777) #设置随机种子

# 获取数据集
all_num = 1000  #处理的样本总数
split_num = int(all_num * 0.9)
train_num = split_num #训练集数量
test_num = all_num - split_num #测试集数量

IMGSIZE = 100 #输入图片的宽度和高度，不能太小，太小无法识别
def get_all_files(file_path): # 获取图片路径及其标签 file_path: a sting, 图片所在目录
    image_list = []
    label_list = []
    cat_count = 0
    dog_count = 0
    for item in os.listdir(file_path): # 文件名形如cat.0.jpg
        item_path = file_path + '/' + item
        if item[:3] == 'cat':  # 猫标记为'0'
            label_list.append([1,0])
            cat_count += 1
        else:  # 狗标记为'1'
            label_list.append([0,1])
            dog_count += 1
        image_list.append(item_path)
    print('数据集中有%d只猫,%d只狗.' % (cat_count, dog_count))
    #乱序
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(label_list)))
    x_shuffled = np.array(image_list)[shuffle_indices]
    y_shuffled = np.array(label_list)[shuffle_indices]

    image_list = x_shuffled[:train_num]
    label_list = y_shuffled[:train_num]
    image_test = x_shuffled[-test_num:]
    label_test = y_shuffled[-test_num:]
    return image_list, label_list, image_test, label_test

image_dir = r'../../../../large_data/DL1/catdog_data/data/train'
test_dir = r'E:\八维\丁博_深度学习\tensorflow补充\data\test'
train_list,Y_one_hot,test_list,Y_test = get_all_files(image_dir) #这个目录里的文件有标签
print(train_list.shape,Y_one_hot.shape)

#读入图片文件
# def readimg(file):
#     image = plt.imread(file)
#     image = misc.imresize(image,(IMGSIZE,IMGSIZE))
#     image = image / 255. #归一化
#     return image
def readimg(file):
    image = cv2.imread(file)
    image = cv2.resize(image, (IMGSIZE, IMGSIZE))
    image = image / 255 #归一化
    # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
    # cv2.imshow('image', image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    return image

imgs = []
imgs_test = []
for i in range(0, train_num):#读入训练集
    image_train = readimg(train_list[i])
    imgs.append(image_train)
    if i % 100 == 0:
        print('read train ',i)
for i in range(0, test_num):#读入测试集
    image_test = readimg(test_list[i])
    imgs_test.append(image_test)
    if i % 100 == 0:
        print('read test ',i)
imgArr = np.array(imgs)  #训练集
imgArrTest = np.array(imgs_test) #测试集

g_b=0
# 自己实现next_batch函数，每次返回一批数据
def next_batch(size):
    global g_b
    xb = imgArr[g_b:g_b+size]
    yb = Y_one_hot[g_b:g_b+size]
    g_b = g_b + size
    return xb,yb
# 参数
learning_rate = 0.0001 # 学习率
training_epochs = 100  # 训练总周期
batch_size = 100 # 训练每批样本数
#定义占位符
X = tf.placeholder(tf.float32, [None, IMGSIZE, IMGSIZE, 3])
Y = tf.placeholder(tf.float32, [None, 2])  # 独热编码
with tf.variable_scope('conv1'): #卷积第一层 输入图片数据(?, IMGSIZE, IMGSIZE, 3)
    W1 = tf.Variable(tf.random_normal([3, 3, 3, 16]))  #卷积核3x3，输入通道3，输出通道16
    L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME') #卷积输出 （?, IMGSIZE, IMGSIZE, 16)
    L1 = tf.nn.relu(L1)
    L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME') #池化输出 (?, IMGSIZE/2, IMGSIZE/2, 16)
with tf.variable_scope('conv2'): # 卷积第二层 输入图片数据(?, IMGSIZE/2, IMGSIZE/2, 16)
    W2 = tf.Variable(tf.random_normal([3, 3, 16, 16], stddev=0.01)) #卷积核3x3，输入通道16，输出通道16
    L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME') #卷积输出  (?, IMGSIZE/2, IMGSIZE/2, 16)
    L2 = tf.nn.relu(L2)
    L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #池化输出 (?, IMGSIZE/4, IMGSIZE/4, 16)
    # 变成一维向量
    dim = L2.get_shape()[1].value * L2.get_shape()[2].value * L2.get_shape()[3].value
    L2_flat = tf.reshape(L2, [-1, dim])
with tf.variable_scope('fc1'): #全连接1
    W3 = tf.get_variable("W3", shape=[dim, 128], initializer=tf.contrib.layers.xavier_initializer())
    b3 = tf.Variable(tf.random_normal([128]))
    L3 = tf.nn.relu(tf.matmul(L2_flat, W3) + b3)
with tf.variable_scope('fc2'): # 全连接2
    W4 = tf.get_variable("W4", shape=[128, 128], initializer=tf.truncated_normal_initializer(stddev=0.005))
    b4 = tf.get_variable("b4", shape=[128], initializer=tf.constant_initializer(0.1))
    L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
    L4 = tf.nn.dropout(L4, keep_prob=0.9)
with tf.variable_scope('softmax'): # softmax层
    W5 = tf.get_variable("w5", shape=[128, 2], initializer=tf.truncated_normal_initializer(stddev=0.005))
    b5 = tf.get_variable("b5", shape=[2], initializer=tf.constant_initializer(0.1))
    logits = tf.add(tf.matmul(L4, W5), b5)
#代价或损失函数
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # 优化器

correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# 创建会话
sess = tf.Session()
sess.run(tf.global_variables_initializer()) #全局变量初始化
# 迭代训练
print('开始学习...')
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(train_num / batch_size)  # 计算总批次
    g_b = 0
    for i in range(total_batch):
        batch_xs, batch_ys = next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
    if epoch % 20 == 0:
        acc = sess.run(accuracy, feed_dict={X: imgArrTest, Y: Y_test})
        print('Epoch:', (epoch + 1), 'cost =', avg_cost, 'acc=', acc)
        # print(batch_xs.shape)
        # print(batch_ys.shape)
print('学习完成')

# 测试模型检查准确率
print('Accuracy:', sess.run(accuracy, feed_dict={X: imgArrTest, Y: Y_test}))

# 在测试集中随机抽一个样本进行测试
r = random.randint(0, test_num - 1)
print("Label: ", sess.run(tf.argmax(Y_test[r:r + 1], 1)))
print("Prediction: ", sess.run(tf.argmax(logits, 1), feed_dict={X: imgArrTest[r:r + 1]}))
